library(vegan)
library(fgsea)
library(limma)
library(BioNERO)
library(DESeq2)
library(lmerTest)
library(permutes)
library(tidyverse)
library(immunedeconv)
library(clusterProfiler)
library(ggpubr)
library(ggplot2)
library(pheatmap)
library(RColorBrewer)
library(ComplexHeatmap)
library(EnhancedVolcano)
library(msigdbr)
library(biomaRt)
library(org.Hs.eg.db)
library(org.Mm.eg.db)
library(Orthology.eg.db)
library(DT)RNA-seq-report
Analysis of RNA-seq data
1. Import libraries.
- Import data and create DESeq2 object.
# Import data
meta_data <- read.csv("../data/metadata.tsv", sep = "\t")
meta_data$group <- as.factor(meta_data$group)
meta_data$batch <- as.factor(meta_data$batch)
meta_data$File <- meta_data$sampleid
meta_data <- meta_data[c(1,4,2,3)]
meta_data$File <- paste0(meta_data$File, ".counts")
ddsHTSeq <- DESeqDataSetFromHTSeqCount(sampleTable = meta_data,
directory = "../data/htseq/",
design= ~ 0 + group + batch)
rownames(ddsHTSeq) <- sapply(str_split(rownames(ddsHTSeq), "\\."), function(x) x[1])- Filter gene table by counts.
keep <- rowSums(counts(ddsHTSeq)>= 10) > ncol(ddsHTSeq)*0.3
ddsHTSeq <- ddsHTSeq[keep,]- Differential expression analysis.
ddsHTSeq$group <- relevel(ddsHTSeq$group, ref = "M")
ddsHTSeq <- DESeq(ddsHTSeq)
contr_1 <- makeContrasts(groupM - groupM_BIF, levels = resultsNames(ddsHTSeq))
contr_2 <- makeContrasts(groupM - groupM_LAC, levels = resultsNames(ddsHTSeq))
contr_3 <- makeContrasts(groupM_BIF - groupM_LAC, levels = resultsNames(ddsHTSeq))
res_1 <- results(ddsHTSeq, contrast=contr_1)
res_2 <- results(ddsHTSeq, contrast=contr_2)
res_3 <- results(ddsHTSeq, contrast=contr_3)5. Get normalized counts table.
COU <- counts(ddsHTSeq, normalized=TRUE)
datatable(COU,
caption = "Table 1. Normalized counts.",
style = "bootstrap5",
extensions = 'Buttons',
options = list(dom = 'Bfrtip',
buttons = c('csv', 'excel'))) %>%
formatStyle(columns = colnames(COU), fontSize = '75%')- Get DEG tables.
get_DEG <- function(res){
DEG <- cbind(as.data.frame(res), COU)
DEG <- DEG[!is.na(DEG$padj),]
DEG <- DEG[order(DEG$log2FoldChange, decreasing = T),]
return(DEG)
}
DEG_BIF <- get_DEG(res_1)
DEG_LAC <- get_DEG(res_2)
DEG_BIF_LAC <- get_DEG(res_3)
datatable(DEG_BIF,
caption = "Table 2. DEG M vs M_BIF.",
style = "bootstrap5",
extensions = 'Buttons',
options = list(dom = 'Bfrtip',
buttons = c('csv', 'excel'))) %>%
formatStyle(columns = colnames(DEG_BIF), fontSize = '75%')datatable(DEG_LAC,
caption = "Table 3. DEG M vs M_LAC.",
style = "bootstrap5",
extensions = 'Buttons',
options = list(dom = 'Bfrtip',
buttons = c('csv', 'excel'))) %>%
formatStyle(columns = colnames(DEG_LAC), fontSize = '75%')datatable(DEG_BIF_LAC,
caption = "Table 4. DEG M_BIF vs M_LAC.",
style = "bootstrap5",
extensions = 'Buttons',
options = list(dom = 'Bfrtip',
buttons = c('csv', 'excel'))) %>%
formatStyle(columns = colnames(DEG_BIF_LAC), fontSize = '75%')- Get volcano plots
VolcanoPlot1 <- EnhancedVolcano(DEG_BIF,
lab = rownames(DEG_BIF),
x = 'log2FoldChange',
y = 'padj', pCutoff = 0.05,
subtitle = "",
title = "M vs M_BIF", labSize = 0)
VolcanoPlot2 <- EnhancedVolcano(DEG_LAC,
lab = rownames(DEG_LAC),
x = 'log2FoldChange',
y = 'padj', pCutoff = 0.05,
subtitle = "",
title = "M vs M_LAC",
labSize = 0)
VolcanoPlot3 <- EnhancedVolcano(DEG_BIF_LAC,
lab = rownames(DEG_BIF_LAC),
x = 'log2FoldChange',
y = 'padj', pCutoff = 0.05,
subtitle = "",
title = "M_BIF vs M_LAC",
labSize = 0)
volcano.all <- ggarrange(VolcanoPlot1,
VolcanoPlot2,
VolcanoPlot3, nrow = 1,
common.legend = T)
volcano.all
- MSigDB GSEA.
pathwaysDF <- msigdbr("mouse", category="H")
pathways <- split(as.character(pathwaysDF$ensembl_gene), pathwaysDF$gs_name)
get_ranks <- function(DEG){
ranks.df <- DEG$log2FoldChange
names(ranks.df) <- rownames(DEG)
names(ranks.df) <- sapply(str_split(names(ranks.df), "\\."), function(x) x[1])
ranks.df <- sort(ranks.df, decreasing = T)
return(ranks.df)
}
ranks.bif <- get_ranks(DEG_BIF)
ranks.lac <- get_ranks(DEG_LAC)
ranks.bif.lac <- get_ranks(DEG_BIF_LAC)
fgsea.bif <- fgsea(pathways = pathways,
stats = ranks.bif,
eps = 0.0,
minSize = 15,
maxSize = 500)
fgsea.lac <- fgsea(pathways = pathways,
stats = ranks.lac,
eps = 0.0,
minSize = 15,
maxSize = 500)
fgsea.bif.lac <- fgsea(pathways = pathways,
stats = ranks.bif.lac,
eps = 0.0,
minSize = 15,
maxSize = 500)
filter_pval <- function(fgsea.res){
fgsea.res <- as.data.frame(fgsea.res)
fgsea.res <- fgsea.res[fgsea.res$padj < 0.05,]
fgsea.res <- fgsea.res[order(fgsea.res$NES, decreasing = T),]
fgsea.res <- fgsea.res[-8]
return(fgsea.res)
}
fgsea.bif.filter <- filter_pval(fgsea.bif)
fgsea.lac.filter <- filter_pval(fgsea.lac)
fgsea.bif.lac.filter <- filter_pval(fgsea.bif.lac)
datatable(fgsea.bif.filter,
caption = "Table 5. DEG M vs M_BIF.",
style = "bootstrap5",
extensions = 'Buttons',
options = list(dom = 'Bfrtip',
buttons = c('csv', 'excel'))) %>%
formatStyle(columns = colnames(fgsea.bif.filter), fontSize = '75%')datatable(fgsea.lac.filter,
caption = "Table 6. DEG M vs M_LAC.",
style = "bootstrap5",
extensions = 'Buttons',
options = list(dom = 'Bfrtip',
buttons = c('csv', 'excel'))) %>%
formatStyle(columns = colnames(fgsea.lac.filter), fontSize = '75%')datatable(fgsea.bif.lac.filter,
caption = "Table 7. DEG M_BIF vs M_LAC.",
style = "bootstrap5",
extensions = 'Buttons',
options = list(dom = 'Bfrtip',
buttons = c('csv', 'excel'))) %>%
formatStyle(columns = colnames(fgsea.bif.lac.filter), fontSize = '75%')get_subset <- function(df, group){
df.sbs <- df[c(1,6)]
df.sbs$group <- group
return(df.sbs)
}
fgsea.bif.sbs <- get_subset(fgsea.bif.filter, "M vs M_BIF")
fgsea.lac.sbs <- get_subset(fgsea.lac.filter, "M vs M_LAC")
fgsea.bif.lac.sbs <- get_subset(fgsea.bif.lac.filter, "M_BIF vs M_LAC")
fgsea.all <- rbind(fgsea.bif.sbs, fgsea.lac.sbs, fgsea.bif.lac.sbs)
fgsea.all <- spread(fgsea.all, group, NES, fill = 0)
rownames(fgsea.all) <- fgsea.all$pathway
fgsea.all <- fgsea.all[-1]
heatmap.hallmark <- pheatmap::pheatmap(as.matrix(fgsea.all), cutree_rows = 7, cutree_cols = 2, display_numbers = T)
heatmap.hallmark